library(tidyverse)
library(ggplot2)
library(foreign)
library(doBy)
library(data.table)
# Load dataset
repay_main_extend <- read.dta("/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/data/repayment/fenix_repay_extend_07172020_strict_rep.dta")
# Drop loandayselapsed more than 200 days
repay_main_extend <- subset(repay_main_extend, loandayselapsed<=200)
# Make treatment assignment labels simpler
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-L"] <- "Secured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-U"] <- "Surprise Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T2-U"] <- "Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T3"] <- "Choice"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R C"] <- "Control"
library(tidyverse)
library(ggplot2)
library(foreign)
library(doBy)
library(data.table)
# Load dataset
repay_main_extend <- read.dta("/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/data/repayment/fenix_repay_extend_07172020_strict_rep.dta")
# Drop loandayselapsed more than 200 days
repay_main_extend <- subset(repay_main_extend, loandayselapsed<=200)
# Make treatment assignment labels simpler
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-L"] <- "Secured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-U"] <- "Surprise Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T2-U"] <- "Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T3"] <- "Choice"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R C"] <- "Control"
# Duplicate treatmenttype and then factorize the treatmenttype (important for graphing)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_sh)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_fac, levels = c("Secured", "Surprise Unsecured", "Unsecured", "Choice", "Control"))
# Make versions that are without control and choice
repay_main_extend_noctrl <- repay_main_extend[which(repay_main_extend$treatmenttype_sh !="Control"),]
repay_main_extend_noctrlnochoice <- repay_main_extend_noctrl[which(repay_main_extend_noctrl$treatmenttype_sh !="Choice"),]
# Create a summary dataset that collects together, at each value of loandayselapsed, the mean and variance...
rr_data <- summaryBy(frac_lpp_maxip ~ loandayselapsed + treatmenttype_fac, FUN=c(mean, sd), data=repay_main_extend_noctrlnochoice)
# I also want differences between repayment rates
# Clone rr_data
piece1 <- subset(rr_data, treatmenttype_fac=="Secured")
piece2 <- subset(rr_data, treatmenttype_fac=="Surprise Unsecured")
piece3 <- subset(rr_data, treatmenttype_fac=="Unsecured")
# Create a dataset that loandayselapsed, and the various means
rr_data2 <- as.data.table(piece1[,1])
colnames(rr_data2)[which(names(rr_data2) == "V1")] <- "loandayselapsed"
rr_data2$mean.locked <- as.data.table(piece1[,3])
rr_data2$mean.surpriseunlocked <- as.data.table(piece2[,3])
rr_data2$mean.unlocked <- as.data.table(piece3[,3])
# Take differences
rr_data2$redgreen <- rr_data2$mean.locked - rr_data2$mean.unlocked
rr_data2$redorange <- rr_data2$mean.locked - rr_data2$mean.surpriseunlocked
rr_data2$orangegreen <- rr_data2$mean.surpriseunlocked - rr_data2$mean.unlocked
# Make pieces again, and make a variable that labels the group, and add that group label
redgreenpiece <- rr_data2 %>% select("loandayselapsed", "redgreen")
colnames(redgreenpiece)[which(names(redgreenpiece) == "redgreen")] <- "diff"
redgreenpiece$type <- "Total Effect"
redorangepiece <- rr_data2 %>% select("loandayselapsed", "redorange")
colnames(redorangepiece)[which(names(redorangepiece) == "redorange")] <- "diff"
redorangepiece$type <- "Moral Hazard"
orangegreenpiece <- rr_data2 %>% select("loandayselapsed", "orangegreen")
colnames(orangegreenpiece)[which(names(orangegreenpiece) == "orangegreen")] <- "diff"
orangegreenpiece$type <- "Selection"
# Stack pieces on top of each other
rr_data2 <- as.data.table(rbind(redgreenpiece, redorangepiece, orangegreenpiece, fill=TRUE))
# Factorize the levels again, to order as Total Effect, Moral Hazard, Selection
rr_data2$type_fac <- ordered(rr_data2$type)
rr_data2$type_fac <- ordered(rr_data2$type_fac, levels = c("Total Effect", "Moral Hazard", "Selection"))
# Graph 1: repayment rates
myplot <- ggplot() +
geom_line(data=rr_data, aes(x=loandayselapsed, y=frac_lpp_maxip.mean, group=treatmenttype_fac, color=treatmenttype_fac), size=0.5) +
scale_color_manual(values=c('red', 'orange', 'green')) +
xlab("Days elapsed since loan creation") +
ylab("Fraction of principal repaid") +
scale_y_continuous(breaks=c(0.24, 0.36, 0.48, 0.60, 0.72), limits=c(0.24,.72)) +
scale_x_continuous(breaks=c(50, 100, 150, 200), limits=c(50, 200)) +
labs(color='Assigned treatment group') +
theme(
legend.justification = c("right", "bottom"),
legend.position = c(1.00, .00),
legend.background = element_rect(fill = NA),
panel.background = element_rect(fill = NA),
panel.border = element_rect(fill = NA, color = 'grey75'),
axis.ticks = element_line(color = 'grey85'),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(vjust=3),
plot.margin = unit(c(1,1,1,1), 'cm'),
legend.title=element_blank(),
legend.text=element_text(size=6),
legend.key.size=unit(0.5,"line"),
plot.title = element_text(size = 10),
text = element_text(size=10)
)
# Figure 4 Panel (a)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/repaymentratesgrey.pdf", width = 4, height = 3)
# Graph 2: difference in repayment rates
myplot2 <- ggplot() +
geom_line(data=rr_data2, aes(x=loandayselapsed, y=diff, group=type_fac, color=type_fac), size=0.5) +
scale_color_manual(values=c('red', 'black', 'blue')) +
xlab("Days elapsed since loan creation") +
ylab("Difference in fraction of principal repaid") +
scale_y_continuous(breaks=c(0.01, 0.03, 0.05, 0.07, 0.09, 0.11, 0.13), limits=c(0.01,0.13)) +
scale_x_continuous(breaks=c(50, 100, 150, 200), limits=c(50, 200)) +
labs(color='Type of effect') +
theme(
legend.justification = c("right", "bottom"),
legend.position = c(1.00, .00),
legend.background = element_rect(fill = NA),
panel.background = element_rect(fill = NA),
panel.border = element_rect(fill = NA, color = 'grey75'),
axis.ticks = element_line(color = 'grey85'),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(vjust=3),
plot.margin = unit(c(1,1,1,1), 'cm'),
legend.title=element_blank(),
legend.text=element_text(size=6),
legend.key.size=unit(0.5,"line"),
plot.title = element_text(size = 10),
text = element_text(size=10)
)
# Figure 4 Panel (b)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/diff_repayments.pdf", width = 4, height = 3)
library(tidyverse)
library(tidyverse)
library(tidyverse)
### This R script is to plot repayment rates, i.e. Figure 4 Panels (c) and (d)
##########################################################################################
## Load packages (note: install first if not yet loaded)
##########################################################################################
#install.packages("tidyverse")
#install.packages("ggplot2")
#install.packages("foreign")
#install.packages("doBy")
#install.packages("data.table")
library(tidyverse)
library(ggplot2)
library(foreign)
library(doBy)
library(data.table)
# Unused but keep in case
#install.packages("gridExtra")
#install.packages("ggpubr")
#library(gridExtra)
#library(ggpubr)
##########################################################################################
## Run data work
##########################################################################################
# Load dataset
repay_main_extend <- read.dta("/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/data/repayment/fenix_repay_extend_07172020_strict_rep.dta")
# Drop loandayselapsed more than 200 days
repay_main_extend <- subset(repay_main_extend, loandayselapsed<=200)
repay_main_extend <- repay_main_extend %>% mutate(completeloan = ifelse(completeloan == "No", 0, 1))
# Make treatment assignment labels simpler
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-L"] <- "Secured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T1-U"] <- "Surprise Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T2-U"] <- "Unsecured"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R T3"] <- "Choice"
repay_main_extend$treatmenttype_sh[repay_main_extend$treatmenttype_sh=="R C"] <- "Control"
# Duplicate treatmenttype and then factorize the treatmenttype (important for graphing)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_sh)
repay_main_extend$treatmenttype_fac <- ordered(repay_main_extend$treatmenttype_fac, levels = c("Secured", "Surprise Unsecured", "Unsecured", "Choice", "Control"))
# Make versions that are without control and choice
repay_main_extend_noctrl <- repay_main_extend[which(repay_main_extend$treatmenttype_sh !="Control"),]
repay_main_extend_noctrlnochoice <- repay_main_extend_noctrl[which(repay_main_extend_noctrl$treatmenttype_sh !="Choice"),]
# Create a summary dataset that collects together, at each value of loandayselapsed, the mean and variance...
lcr_data <- summaryBy(completeloan ~ loandayselapsed + treatmenttype_fac, FUN=c(mean, sd), data=repay_main_extend_noctrlnochoice)
# I also want differences between completion rates
# Clone lcr_data
piece1 <- subset(lcr_data, treatmenttype_fac=="Secured")
piece2 <- subset(lcr_data, treatmenttype_fac=="Surprise Unsecured")
piece3 <- subset(lcr_data, treatmenttype_fac=="Unsecured")
# Create a dataset that loandayselapsed, and the various means
lcr_data2 <- as.data.table(piece1[,1])
colnames(lcr_data2)[which(names(lcr_data2) == "V1")] <- "loandayselapsed"
lcr_data2$mean.locked <- as.data.table(piece1[,3])
lcr_data2$mean.surpriseunlocked <- as.data.table(piece2[,3])
lcr_data2$mean.unlocked <- as.data.table(piece3[,3])
# Take differences
lcr_data2$redgreen <- lcr_data2$mean.locked - lcr_data2$mean.unlocked
lcr_data2$redorange <- lcr_data2$mean.locked - lcr_data2$mean.surpriseunlocked
lcr_data2$orangegreen <- lcr_data2$mean.surpriseunlocked - lcr_data2$mean.unlocked
# Make pieces again, and make a variable that labels the group, and add that group label
redgreenpiece <- lcr_data2 %>% select("loandayselapsed", "redgreen")
colnames(redgreenpiece)[which(names(redgreenpiece) == "redgreen")] <- "diff"
redgreenpiece$type <- "Total Effect"
redorangepiece <- lcr_data2 %>% select("loandayselapsed", "redorange")
colnames(redorangepiece)[which(names(redorangepiece) == "redorange")] <- "diff"
redorangepiece$type <- "Moral Hazard"
orangegreenpiece <- lcr_data2 %>% select("loandayselapsed", "orangegreen")
colnames(orangegreenpiece)[which(names(orangegreenpiece) == "orangegreen")] <- "diff"
orangegreenpiece$type <- "Selection"
# Stack pieces on top of each other
lcr_data2 <- as.data.table(rbind(redgreenpiece, redorangepiece, orangegreenpiece, fill=TRUE))
# Factorize the levels again, to order as Total Effect, Moral Hazard, Selection
lcr_data2$type_fac <- ordered(lcr_data2$type)
lcr_data2$type_fac <- ordered(lcr_data2$type_fac, levels = c("Total Effect", "Moral Hazard", "Selection"))
############################
# Graph 1: completion rates
myplot <- ggplot() +
geom_line(data=lcr_data, aes(x=loandayselapsed, y=completeloan.mean, group=treatmenttype_fac, color=treatmenttype_fac), size=0.5) +
scale_color_manual(values=c('red', 'orange', 'green')) +
xlab("Days elapsed since loan creation") +
ylab("Fraction completed") +
scale_y_continuous(breaks=c(0.21, 0.32, 0.43, 0.54, 0.65), limits=c(0.21,.65)) +
scale_x_continuous(breaks=c(100, 150, 200), limits=c(100, 200)) +
labs(color='Assigned treatment group') +
theme(
legend.justification = c("right", "bottom"),
legend.position = c(1.00, .00),
legend.background = element_rect(fill = NA),
panel.background = element_rect(fill = NA),
panel.border = element_rect(fill = NA, color = 'grey75'),
axis.ticks = element_line(color = 'grey85'),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(vjust=3),
plot.margin = unit(c(1,1,1,1), 'cm'),
legend.title=element_blank(),
legend.text=element_text(size=6),
legend.key.size=unit(0.5,"line"),
plot.title = element_text(size = 10),
text = element_text(size=10)
)
# Figure 4 Panel (c)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/loancompleteratesgrey.pdf", width = 4, height = 3)
# Graph 2: difference in completion rates
myplot2 <- ggplot() +
geom_line(data=lcr_data2, aes(x=loandayselapsed, y=diff, group=type_fac, color=type_fac), size=0.5) +
scale_color_manual(values=c('red', 'black', 'blue')) +
xlab("Days elapsed since loan creation") +
ylab("Difference in fraction completed") +
scale_y_continuous(breaks=c(-0.03, 0.01, 0.05, 0.09, 0.13, 0.17), limits=c(-0.03,.17)) +
scale_x_continuous(breaks=c(100, 150, 200), limits=c(100, 200)) +
labs(color='Type of effect') +
theme(
legend.justification = c("right", "bottom"),
legend.position = c(1.00, .00),
legend.background = element_rect(fill = NA),
panel.background = element_rect(fill = NA),
panel.border = element_rect(fill = NA, color = 'grey75'),
axis.ticks = element_line(color = 'grey85'),
axis.title.x = element_text(vjust=-1),
axis.title.y = element_text(vjust=3),
plot.margin = unit(c(1,1,1,1), 'cm'),
legend.title=element_blank(),
legend.text=element_text(size=6),
legend.key.size=unit(0.5,"line"),
plot.title = element_text(size = 10),
text = element_text(size=10)
)
# Figure 4 Panel (d)
ggsave(file="/Users/rpickmans/Library/CloudStorage/Dropbox/Fenix Solar/02-schoolFeeLoans/Replication/figures/diff_complete.pdf", width = 4, height = 3)
